1 package org.apache.lucene.search.vectorhighlight;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import org.apache.lucene.analysis.MockAnalyzer;
21 import org.apache.lucene.document.Document;
22 import org.apache.lucene.document.Field;
23 import org.apache.lucene.document.FieldType;
24 import org.apache.lucene.document.TextField;
25 import org.apache.lucene.index.DirectoryReader;
26 import org.apache.lucene.index.IndexReader;
27 import org.apache.lucene.index.IndexWriter;
28 import org.apache.lucene.index.IndexWriterConfig;
29 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
30 import org.apache.lucene.index.RandomIndexWriter;
31 import org.apache.lucene.index.Term;
32 import org.apache.lucene.search.BooleanClause;
33 import org.apache.lucene.search.BooleanQuery;
34 import org.apache.lucene.search.Query;
35 import org.apache.lucene.search.TermQuery;
36 import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
37 import org.apache.lucene.store.Directory;
38 import org.apache.lucene.util.TestUtil;
39
40 import java.util.ArrayList;
41 import java.util.HashMap;
42 import java.util.HashSet;
43 import java.util.List;
44 import java.util.Map;
45 import java.util.Set;
46
47 public class SimpleFragmentsBuilderTest extends AbstractTestCase {
48
49 public void test1TermIndex() throws Exception {
50 FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "a" );
51 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
52 assertEquals( "<b>a</b>", sfb.createFragment( reader, 0, F, ffl ) );
53
54
55 sfb = new SimpleFragmentsBuilder( new String[]{ "[" }, new String[]{ "]" } );
56 assertEquals( "[a]", sfb.createFragment( reader, 0, F, ffl ) );
57 }
58
59 public void test2Frags() throws Exception {
60 FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b a b a b" );
61 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
62 String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
63
64 assertEquals( 2, f.length );
65 assertEquals( "<b>a</b> b b b b b b b b b b", f[0] );
66 assertEquals( "b b <b>a</b> b <b>a</b> b", f[1] );
67 }
68
69 public void test3Frags() throws Exception {
70 BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
71 booleanQuery.add(new TermQuery(new Term(F, "a")), BooleanClause.Occur.SHOULD);
72 booleanQuery.add(new TermQuery(new Term(F, "c")), BooleanClause.Occur.SHOULD);
73
74 FieldFragList ffl = ffl(booleanQuery.build(), "a b b b b b b b b b b b a b a b b b b b c a a b b" );
75 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
76 String[] f = sfb.createFragments( reader, 0, F, ffl, 3 );
77 assertEquals( 3, f.length );
78 assertEquals( "<b>a</b> b b b b b b b b b b", f[0] );
79 assertEquals( "b b <b>a</b> b <b>a</b> b b b b b c", f[1] );
80 assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b", f[2] );
81 }
82
83 public void testTagsAndEncoder() throws Exception {
84 FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "<h1> a </h1>" );
85 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
86 String[] preTags = { "[" };
87 String[] postTags = { "]" };
88 assertEquals( "<h1> [a] </h1>",
89 sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
90 }
91
92 private FieldFragList ffl(Query query, String indexValue ) throws Exception {
93 make1d1fIndex( indexValue );
94 FieldQuery fq = new FieldQuery( query, true, true );
95 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
96 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
97 return new SimpleFragListBuilder().createFieldFragList( fpl, 20 );
98 }
99
100 public void test1PhraseShortMV() throws Exception {
101 makeIndexShortMV();
102
103 FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
104 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
105 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
106 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
107 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
108 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
109
110 assertEquals( " a b c <b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
111 }
112
113 public void test1PhraseLongMV() throws Exception {
114 makeIndexLongMV();
115
116 FieldQuery fq = new FieldQuery( pqF( "search", "engines" ), true, true );
117 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
118 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
119 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
120 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
121 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
122 assertEquals( "customization: The most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can",
123 sfb.createFragment( reader, 0, F, ffl ) );
124 }
125
126 public void test1PhraseLongMVB() throws Exception {
127 makeIndexLongMVB();
128
129 FieldQuery fq = new FieldQuery( pqF( "sp", "pe", "ee", "ed" ), true, true );
130 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
131 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
132 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
133 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
134 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
135 assertEquals( "additional hardware. \nWhen you talk about processing <b>speed</b>, the", sfb.createFragment( reader, 0, F, ffl ) );
136 }
137
138 public void testUnstoredField() throws Exception {
139 makeUnstoredIndex();
140
141 FieldQuery fq = new FieldQuery( tq( "aaa" ), true, true );
142 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
143 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
144 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
145 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
146 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
147 assertNull( sfb.createFragment( reader, 0, F, ffl ) );
148 }
149
150 protected void makeUnstoredIndex() throws Exception {
151 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzerW).setOpenMode(OpenMode.CREATE));
152 Document doc = new Document();
153 FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
154 customType.setStoreTermVectors(true);
155 customType.setStoreTermVectorOffsets(true);
156 customType.setStoreTermVectorPositions(true);
157 doc.add( new Field( F, "aaa", customType) );
158
159 writer.addDocument( doc );
160 writer.close();
161 if (reader != null) reader.close();
162 reader = DirectoryReader.open(dir);
163 }
164
165 public void test1StrMV() throws Exception {
166 makeIndexStrMV();
167
168 FieldQuery fq = new FieldQuery( tq( "defg" ), true, true );
169 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
170 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
171 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
172 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
173 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
174 sfb.setMultiValuedSeparator( '/' );
175 assertEquals( "abc/<b>defg</b>/hijkl", sfb.createFragment( reader, 0, F, ffl ) );
176 }
177
178 public void testMVSeparator() throws Exception {
179 makeIndexShortMV();
180
181 FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
182 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
183 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
184 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
185 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
186 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
187 sfb.setMultiValuedSeparator( '/' );
188 assertEquals( "//a b c//<b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
189 }
190
191 public void testDiscreteMultiValueHighlighting() throws Exception {
192 makeIndexShortMV();
193
194 FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
195 FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
196 FieldPhraseList fpl = new FieldPhraseList( stack, fq );
197 SimpleFragListBuilder sflb = new SimpleFragListBuilder();
198 FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
199 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
200 sfb.setDiscreteMultiValueHighlighting(true);
201 assertEquals( "<b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
202
203 make1dmfIndex("some text to highlight", "highlight other text");
204 fq = new FieldQuery( tq( "text" ), true, true );
205 stack = new FieldTermStack( reader, 0, F, fq );
206 fpl = new FieldPhraseList( stack, fq );
207 sflb = new SimpleFragListBuilder();
208 ffl = sflb.createFieldFragList( fpl, 32 );
209 String[] result = sfb.createFragments(reader, 0, F, ffl, 3);
210 assertEquals(2, result.length);
211 assertEquals("some <b>text</b> to highlight", result[0]);
212 assertEquals("highlight other <b>text</b>", result[1]);
213
214 fq = new FieldQuery( tq( "highlight" ), true, true );
215 stack = new FieldTermStack( reader, 0, F, fq );
216 fpl = new FieldPhraseList( stack, fq );
217 sflb = new SimpleFragListBuilder();
218 ffl = sflb.createFieldFragList( fpl, 32 );
219 result = sfb.createFragments(reader, 0, F, ffl, 3);
220 assertEquals(2, result.length);
221 assertEquals("text to <b>highlight</b>", result[0]);
222 assertEquals("<b>highlight</b> other text", result[1]);
223 }
224
225 public void testRandomDiscreteMultiValueHighlighting() throws Exception {
226 String[] randomValues = new String[3 + random().nextInt(10 * RANDOM_MULTIPLIER)];
227 for (int i = 0; i < randomValues.length; i++) {
228 String randomValue;
229 do {
230 randomValue = TestUtil.randomSimpleString(random());
231 } while ("".equals(randomValue));
232 randomValues[i] = randomValue;
233 }
234
235 Directory dir = newDirectory();
236 RandomIndexWriter writer = new RandomIndexWriter(
237 random(),
238 dir,
239 newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
240
241 FieldType customType = new FieldType(TextField.TYPE_STORED);
242 customType.setStoreTermVectors(true);
243 customType.setStoreTermVectorOffsets(true);
244 customType.setStoreTermVectorPositions(true);
245
246 int numDocs = randomValues.length * 5;
247 int numFields = 2 + random().nextInt(5);
248 int numTerms = 2 + random().nextInt(3);
249 List<Doc> docs = new ArrayList<>(numDocs);
250 List<Document> documents = new ArrayList<>(numDocs);
251 Map<String, Set<Integer>> valueToDocId = new HashMap<>();
252 for (int i = 0; i < numDocs; i++) {
253 Document document = new Document();
254 String[][] fields = new String[numFields][numTerms];
255 for (int j = 0; j < numFields; j++) {
256 String[] fieldValues = new String[numTerms];
257 fieldValues[0] = getRandomValue(randomValues, valueToDocId, i);
258 StringBuilder builder = new StringBuilder(fieldValues[0]);
259 for (int k = 1; k < numTerms; k++) {
260 fieldValues[k] = getRandomValue(randomValues, valueToDocId, i);
261 builder.append(' ').append(fieldValues[k]);
262 }
263 document.add(new Field(F, builder.toString(), customType));
264 fields[j] = fieldValues;
265 }
266 docs.add(new Doc(fields));
267 documents.add(document);
268 }
269 writer.addDocuments(documents);
270 writer.close();
271 IndexReader reader = DirectoryReader.open(dir);
272
273 try {
274 int highlightIters = 1 + random().nextInt(120 * RANDOM_MULTIPLIER);
275 for (int highlightIter = 0; highlightIter < highlightIters; highlightIter++) {
276 String queryTerm = randomValues[random().nextInt(randomValues.length)];
277 int randomHit = valueToDocId.get(queryTerm).iterator().next();
278 List<StringBuilder> builders = new ArrayList<>();
279 for (String[] fieldValues : docs.get(randomHit).fieldValues) {
280 StringBuilder builder = new StringBuilder();
281 boolean hit = false;
282 for (int i = 0; i < fieldValues.length; i++) {
283 if (queryTerm.equals(fieldValues[i])) {
284 builder.append("<b>").append(queryTerm).append("</b>");
285 hit = true;
286 } else {
287 builder.append(fieldValues[i]);
288 }
289 if (i != fieldValues.length - 1) {
290 builder.append(' ');
291 }
292 }
293 if (hit) {
294 builders.add(builder);
295 }
296 }
297
298 FieldQuery fq = new FieldQuery(tq(queryTerm), true, true);
299 FieldTermStack stack = new FieldTermStack(reader, randomHit, F, fq);
300
301 FieldPhraseList fpl = new FieldPhraseList(stack, fq);
302 SimpleFragListBuilder sflb = new SimpleFragListBuilder(100);
303 FieldFragList ffl = sflb.createFieldFragList(fpl, 300);
304
305 SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
306 sfb.setDiscreteMultiValueHighlighting(true);
307 String[] actualFragments = sfb.createFragments(reader, randomHit, F, ffl, numFields);
308 assertEquals(builders.size(), actualFragments.length);
309 for (int i = 0; i < actualFragments.length; i++) {
310 assertEquals(builders.get(i).toString(), actualFragments[i]);
311 }
312 }
313 } finally {
314 reader.close();
315 dir.close();
316 }
317 }
318
319 private String getRandomValue(String[] randomValues, Map<String, Set<Integer>> valueToDocId, int docId) {
320 String value = randomValues[random().nextInt(randomValues.length)];
321 if (!valueToDocId.containsKey(value)) {
322 valueToDocId.put(value, new HashSet<Integer>());
323 }
324 valueToDocId.get(value).add(docId);
325 return value;
326 }
327
328 private static class Doc {
329
330 final String[][] fieldValues;
331
332 private Doc(String[][] fieldValues) {
333 this.fieldValues = fieldValues;
334 }
335 }
336
337 }